In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split  # Optional, if splitting manually not used
from sklearn.metrics import (mean_squared_error,mean_absolute_error,mean_absolute_percentage_error,r2_score)
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, Dropout, Embedding, Flatten, Concatenate, BatchNormalization)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import os 
import random
In [4]:
def set_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)        
    random.seed(seed)                               
    np.random.seed(seed)                            
    tf.random.set_seed(seed)                        

set_seed(42)

Overnights 2024 LR=0.001 Model 3¶

In [232]:
# Load and clean data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
lag_cols = [col for col in cool.columns if 'lag' in col]
cool = cool.drop(columns=lag_cols, errors='ignore')
cool = cool[~cool['country'].isin(['Russian Federation'])]
In [233]:
# Convert types
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)
cool['euro_adopted'] = cool['euro_adopted'].astype(int)

# Merge Google Trends data
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Sort and log-transform
cool = cool.sort_values(['country', 'date']).reset_index(drop=True)
cool = cool.drop_duplicates(subset=['country', 'date'], keep='first').reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals']) 
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])
cool['overnights_next_month'] = cool.groupby('country')['overnights'].shift(-1)

# One-hot encode month
month_names = {i: month for i, month in enumerate(['January','February','March','April','May','June','July','August','September','October','November','December'], 1)}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool, ohe_month], axis=1).drop(columns=['month_name'])

# Create lags
lags = {'arrivals': [1, 3, 6, 12], 'overnights': [1, 3, 6, 12], 'cpi': [1], 'unemployment_rate': [3], 'google_trends': [1, 3]}
for var, steps in lags.items():
    for lag in steps:
        cool[f'{var}_lag_{lag}'] = cool.groupby('country')[var].shift(lag)

cool
# Step 1: Define Schengen entry years
schengen_entry_year = {
    'Austria': 1995,
    'Belgium': 1995,
    'Czech Republic': 2007,
    'Denmark': 2001,
    'Finland': 1996,
    'France': 1995,
    'Germany': 1995,
    'Hungary': 2007,
    'Italy': 1997,
    'Netherlands': 1995,
    'Norway': 2001,
    'Poland': 2007,
    'Portugal': 1995,
    'Slovakia': 2007,
    'Slovenia': 2007,
    'Spain': 1995,
    'Sweden': 2001,
    'Switzerland': 2008,
    'Romania': 2024,}

def is_schengen_member(row):
    entry_year = schengen_entry_year.get(row['country'], np.inf)
    return int(row['year'] >= entry_year)

cool['schengen_member'] = cool.apply(is_schengen_member, axis=1).astype('int8')

# Filter and drop missing
cool.isna().sum().sort_values(ascending=False) 
cool = cool[cool['date'] >= '2001-01-01']
cool = cool[cool['overnights_next_month'].notna()].copy()
In [234]:
# Label encode country and scale numeric features
cool['country_encoded'] = LabelEncoder().fit_transform(cool['country'])
month_cols = [col for col in cool.columns if col.startswith('month_')]
X_numeric = cool[[
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member','euro_adopted'] + month_cols].values
X_country_array = cool['country_encoded'].astype('int32').values
y = cool['overnights_next_month'].values
In [235]:
# Time-based split
train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool['date'] <= train_end
val_mask = (cool['date'] > train_end) & (cool['date'] <= val_end)
test_mask = cool['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)  

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]

# Define model3
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
n_countries = cool['country_encoded'].nunique()
embedding = Embedding(input_dim=n_countries, output_dim=10)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)
model3 = Model(inputs=[input_numeric, input_country], outputs=output)
model3.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model3.summary()

# Train model3
early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
history = model3.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16, callbacks=[early_stop]
)
Model: "functional_26"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_26        │ (None, 1, 10)     │        230 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_26          │ (None, 10)        │          0 │ embedding_26[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_26      │ (None, 35)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_26[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_78 (Dense)    │ (None, 64)        │      2,304 │ concatenate_26[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_78[0][0]    │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_52          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_79 (Dense)    │ (None, 32)        │      2,080 │ dropout_52[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_79[0][0]    │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_53          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_80 (Dense)    │ (None, 1)         │         33 │ dropout_53[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 5,031 (19.65 KB)
 Trainable params: 4,839 (18.90 KB)
 Non-trainable params: 192 (768.00 B)
Epoch 1/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 28s 11ms/step - loss: 89.2403 - val_loss: 35.5990
Epoch 2/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - loss: 24.8093 - val_loss: 2.2839
Epoch 3/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 4.6342 - val_loss: 1.7292
Epoch 4/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 3.3075 - val_loss: 1.7541
Epoch 5/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 2.9630 - val_loss: 1.6112
Epoch 6/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.7032 - val_loss: 1.5946
Epoch 7/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.4437 - val_loss: 1.5346
Epoch 8/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 2.0947 - val_loss: 1.5173
Epoch 9/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - loss: 1.9937 - val_loss: 1.5489
Epoch 10/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 10ms/step - loss: 1.8901 - val_loss: 1.5497
Epoch 11/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 9ms/step - loss: 1.7351 - val_loss: 1.6645
Epoch 12/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.6302 - val_loss: 1.5062
Epoch 13/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.6041 - val_loss: 1.5826
Epoch 14/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.4879 - val_loss: 1.6785
Epoch 15/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.4391 - val_loss: 1.5853
Epoch 16/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.4526 - val_loss: 1.5137
Epoch 17/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.3679 - val_loss: 1.5973
Epoch 18/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.4182 - val_loss: 1.5707
Epoch 19/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.3537 - val_loss: 1.6190
Epoch 20/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 14ms/step - loss: 1.2815 - val_loss: 1.6774
Epoch 21/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 8ms/step - loss: 1.2749 - val_loss: 1.6142
Epoch 22/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.2937 - val_loss: 1.5982
Epoch 23/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.2810 - val_loss: 1.6085
Epoch 24/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2816 - val_loss: 1.6001
Epoch 25/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2587 - val_loss: 1.6219
Epoch 26/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2451 - val_loss: 1.6651
Epoch 27/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1851 - val_loss: 1.6070
Epoch 28/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2446 - val_loss: 1.6157
Epoch 29/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1416 - val_loss: 1.6052
Epoch 30/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0938 - val_loss: 1.6009
Epoch 31/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1726 - val_loss: 1.6858
Epoch 32/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1481 - val_loss: 1.6473
Epoch 33/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.1703 - val_loss: 1.6512
Epoch 34/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1190 - val_loss: 1.5767
Epoch 35/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.1113 - val_loss: 1.5925
Epoch 36/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1510 - val_loss: 1.5767
Epoch 37/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0962 - val_loss: 1.6330
Epoch 38/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1153 - val_loss: 1.5236
Epoch 39/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1686 - val_loss: 1.5506
Epoch 40/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1147 - val_loss: 1.5003
Epoch 41/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1116 - val_loss: 1.6148
Epoch 42/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1468 - val_loss: 1.5725
Epoch 43/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.0854 - val_loss: 1.5362
Epoch 44/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0844 - val_loss: 1.6029
Epoch 45/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.0681 - val_loss: 1.5509
Epoch 46/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.0551 - val_loss: 1.5586
Epoch 47/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0376 - val_loss: 1.6248
Epoch 48/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0158 - val_loss: 1.5275
Epoch 49/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.0508 - val_loss: 1.5094
Epoch 50/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.0701 - val_loss: 1.5162
Epoch 51/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0496 - val_loss: 1.4887
Epoch 52/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.0185 - val_loss: 1.5744
Epoch 53/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0444 - val_loss: 1.4563
Epoch 54/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0266 - val_loss: 1.5447
Epoch 55/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9903 - val_loss: 1.5421
Epoch 56/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0146 - val_loss: 1.6193
Epoch 57/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.0453 - val_loss: 1.5150
Epoch 58/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0113 - val_loss: 1.4521
Epoch 59/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9473 - val_loss: 1.5612
Epoch 60/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.9989 - val_loss: 1.6867
Epoch 61/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9996 - val_loss: 1.4673
Epoch 62/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 0.9882 - val_loss: 1.5372
Epoch 63/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.9569 - val_loss: 1.4765
Epoch 64/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0222 - val_loss: 1.5254
Epoch 65/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9942 - val_loss: 1.5092
Epoch 66/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9570 - val_loss: 1.4974
Epoch 67/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9730 - val_loss: 1.5087
Epoch 68/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9471 - val_loss: 1.4377
Epoch 69/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9546 - val_loss: 1.5303
Epoch 70/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9070 - val_loss: 1.4746
Epoch 71/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9737 - val_loss: 1.5554
Epoch 72/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9728 - val_loss: 1.5023
Epoch 73/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0142 - val_loss: 1.5233
Epoch 74/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9267 - val_loss: 1.4812
Epoch 75/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9398 - val_loss: 1.4838
Epoch 76/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9100 - val_loss: 1.4992
Epoch 77/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8853 - val_loss: 1.4566
Epoch 78/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.9416 - val_loss: 1.4769
Epoch 79/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 10ms/step - loss: 0.8910 - val_loss: 1.5021
Epoch 80/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9447 - val_loss: 1.4970
Epoch 81/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9396 - val_loss: 1.5415
Epoch 82/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9037 - val_loss: 1.4931
Epoch 83/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8417 - val_loss: 1.4645
Epoch 84/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9084 - val_loss: 1.4984
Epoch 85/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8796 - val_loss: 1.4698
Epoch 86/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8927 - val_loss: 1.4486
Epoch 87/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9113 - val_loss: 1.5172
Epoch 88/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8731 - val_loss: 1.5109
Epoch 89/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8907 - val_loss: 1.4753
Epoch 90/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8842 - val_loss: 1.4480
Epoch 91/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8521 - val_loss: 1.4783
Epoch 92/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9067 - val_loss: 1.4194
Epoch 93/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8771 - val_loss: 1.4823
Epoch 94/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.8827 - val_loss: 1.4256
Epoch 95/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8614 - val_loss: 1.4351
Epoch 96/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.8507 - val_loss: 1.5070
Epoch 97/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8448 - val_loss: 1.4578
Epoch 98/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.8651 - val_loss: 1.5128
Epoch 99/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8209 - val_loss: 1.4792
Epoch 100/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8280 - val_loss: 1.4589
In [236]:
# Baseline predictions
baseline_preds = model3.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model3.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(max(0, permuted_mse - baseline_mse))

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model3.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = max(0, mean_squared_error(y_test, month_preds) - baseline_mse)
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 3: Overnights (2024)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
34/34 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step

Model Evaluation¶

In [238]:
## Model evalauation
# Predict and flatten
train_preds = model3.predict([X_num_train, X_cat_train]).flatten()
test_preds = model3.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse3 = np.sqrt(train_mse)
test_rmse3 = np.sqrt(test_mse)

#MAE
train_mae3 = mean_absolute_error(y_train, train_preds)
test_mae3 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape3 = mean_absolute_percentage_error(y_train, train_preds)
test_mape3 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_3 = r2_score(y_train, train_preds)
test_r2_3 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse3:.4f}, MAE: {train_mae3:.4f}, MAPE: {train_mape3:.4f}, R²: {train_r2_3:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse3:.4f}, MAE: {test_mae3:.4f}, MAPE: {test_mape3:.4f}, R²: {test_r2_3:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
138/138 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step
Train MSE: 0.0999, RMSE: 0.3161, MAE: 0.2388, MAPE: 0.0260, R²: 0.9798
Test MSE: 0.3100, RMSE: 0.5568, MAE: 0.4334, MAPE: 0.0432, R²: 0.9311
In [ ]:
y_pred_all = model3.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Overnights (2024)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
207/207 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step
In [240]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Overnights per Country (2024)', fontsize=16, y=1.02)
plt.show()
In [241]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                   country           MSE          RMSE           MAE  \
0                  Austria  1.028618e+11  3.207207e+05  2.174073e+05   
1                  Belgium  2.086010e+09  4.567286e+04  2.272012e+04   
2   Bosnia and Herzegovina  8.145445e+09  9.025212e+04  4.634687e+04   
3                   Canada  1.774758e+08  1.332201e+04  8.498550e+03   
4           Czech Republic  6.500328e+10  2.549574e+05  1.214085e+05   
5                  Denmark  3.653635e+09  6.044530e+04  2.052040e+04   
6                  Finland  2.066050e+08  1.437376e+04  8.788207e+03   
7                   France  3.886561e+09  6.234229e+04  3.070490e+04   
8                  Germany  4.779323e+12  2.186166e+06  1.200339e+06   
9                  Hungary  1.688156e+10  1.299290e+05  6.355589e+04   
10                 Ireland  4.933720e+08  2.221198e+04  1.300958e+04   
11                   Italy  2.409223e+10  1.552167e+05  7.373700e+04   
12             Netherlands  2.083571e+10  1.443458e+05  6.193622e+04   
13                  Norway  1.445879e+09  3.802472e+04  1.362845e+04   
14                  Poland  1.666509e+11  4.082290e+05  2.289431e+05   
15                 Romania  5.463797e+08  2.337477e+04  1.230082e+04   
16                Slovakia  3.082129e+09  5.551692e+04  2.890555e+04   
17                Slovenia  1.659923e+11  4.074216e+05  2.047528e+05   
18                   Spain  1.214496e+08  1.102042e+04  6.626548e+03   
19                  Sweden  1.519215e+09  3.897710e+04  1.818992e+04   
20             Switzerland  4.152904e+09  6.444303e+04  3.543848e+04   
21                     USA  7.544876e+09  8.686125e+04  6.149310e+04   
22          United Kingdom  4.076366e+10  2.019001e+05  1.295231e+05   

        MAPE        R2  
0   0.576117  0.776627  
1   0.632651  0.806398  
2   0.370813  0.745085  
3   0.440671  0.789588  
4   0.283152  0.848905  
5   0.366774  0.822101  
6   0.643731  0.716840  
7   0.298985  0.898379  
8   0.478053  0.160870  
9   0.358179  0.900508  
10  0.541236  0.686613  
11  0.309986  0.894494  
12  0.296124  0.856792  
13  0.561548  0.729123  
14  0.392675  0.731027  
15  0.339013  0.847686  
16  0.244617  0.982166  
17  0.475613  0.761378  
18  0.302552  0.942903  
19  0.326946  0.878634  
20  0.300017  0.717833  
21  0.436030  0.403028  
22  0.678253  0.495580  

Overnights 2019 LR=0.001 Model 4¶

In [324]:
# Load and clean data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
lag_cols = [col for col in cool.columns if 'lag' in col]
cool = cool.drop(columns=lag_cols, errors='ignore')
cool = cool[~cool['country'].isin(['Russian Federation'])]
In [325]:
# Convert types
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)
cool['euro_adopted'] = cool['euro_adopted'].astype(int)

# Merge Google Trends data
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Sort and log-transform
cool = cool.sort_values(['country', 'date']).reset_index(drop=True)
cool = cool.drop_duplicates(subset=['country', 'date'], keep='first').reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals']) 
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])
cool['overnights_next_month'] = cool.groupby('country')['overnights'].shift(-1)

# One-hot encode month
month_names = {i: month for i, month in enumerate(['January','February','March','April','May','June','July','August','September','October','November','December'], 1)}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool, ohe_month], axis=1).drop(columns=['month_name'])

# Create lags
lags = {'arrivals': [1, 3, 6, 12], 'overnights': [1, 3, 6, 12], 'cpi': [1], 'unemployment_rate': [3], 'google_trends': [1, 3]}
for var, steps in lags.items():
    for lag in steps:
        cool[f'{var}_lag_{lag}'] = cool.groupby('country')[var].shift(lag)

cool
# Step 1: Define Schengen entry years
schengen_entry_year = {
    'Austria': 1995,
    'Belgium': 1995,
    'Czech Republic': 2007,
    'Denmark': 2001,
    'Finland': 1996,
    'France': 1995,
    'Germany': 1995,
    'Hungary': 2007,
    'Italy': 1997,
    'Netherlands': 1995,
    'Norway': 2001,
    'Poland': 2007,
    'Portugal': 1995,
    'Slovakia': 2007,
    'Slovenia': 2007,
    'Spain': 1995,
    'Sweden': 2001,
    'Switzerland': 2008,
    'Romania': 2024,}

def is_schengen_member(row):
    entry_year = schengen_entry_year.get(row['country'], np.inf)
    return int(row['year'] >= entry_year)

cool['schengen_member'] = cool.apply(is_schengen_member, axis=1).astype('int8')

# Filter and drop missing
cool.isna().sum().sort_values(ascending=False) 
cool = cool[cool['date'] >= '2001-01-01']
cool = cool[cool['date'] <= '2019-12-01']
cool = cool[cool['overnights_next_month'].notna()].copy()
In [326]:
# Label encode country and scale numeric features
cool['country_encoded'] = LabelEncoder().fit_transform(cool['country'])
month_cols = [col for col in cool.columns if col.startswith('month_')]
X_numeric = cool[[
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member','euro_adopted'] + month_cols].values
X_country_array = cool['country_encoded'].astype('int32').values
y = cool['overnights_next_month'].values
In [327]:
# Time-based split
train_end = pd.Timestamp("2014-12-31")
val_end = pd.Timestamp("2017-12-31")
train_mask = cool['date'] <= train_end
val_mask = (cool['date'] > train_end) & (cool['date'] <= val_end)
test_mask = cool['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]
In [328]:
# Define model4
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
n_countries = cool['country_encoded'].nunique()
embedding = Embedding(input_dim=n_countries, output_dim=10)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)
model4 = Model(inputs=[input_numeric, input_country], outputs=output)
model4.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model4.summary()

# Train mode4
early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
history = model4.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16, callbacks=[early_stop]
)
Model: "functional_36"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_36        │ (None, 1, 10)     │        230 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_36          │ (None, 10)        │          0 │ embedding_36[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_36      │ (None, 35)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_36[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_108 (Dense)   │ (None, 64)        │      2,304 │ concatenate_36[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_108[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_72          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_109 (Dense)   │ (None, 32)        │      2,080 │ dropout_72[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_109[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_73          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_110 (Dense)   │ (None, 1)         │         33 │ dropout_73[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 5,031 (19.65 KB)
 Trainable params: 4,839 (18.90 KB)
 Non-trainable params: 192 (768.00 B)
Epoch 1/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 12s 8ms/step - loss: 90.7974 - val_loss: 46.7961
Epoch 2/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 33.5081 - val_loss: 3.1799
Epoch 3/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 5.2712 - val_loss: 0.7961
Epoch 4/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 3.8846 - val_loss: 0.5789
Epoch 5/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 3.2203 - val_loss: 0.5077
Epoch 6/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 2.9854 - val_loss: 0.5004
Epoch 7/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 2.8682 - val_loss: 0.4250
Epoch 8/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.5530 - val_loss: 0.4092
Epoch 9/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.3035 - val_loss: 0.4177
Epoch 10/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 2.0789 - val_loss: 0.3989
Epoch 11/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 9ms/step - loss: 1.9518 - val_loss: 0.3952
Epoch 12/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.9176 - val_loss: 0.3784
Epoch 13/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.7907 - val_loss: 0.4061
Epoch 14/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.6883 - val_loss: 0.3758
Epoch 15/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.6661 - val_loss: 0.3739
Epoch 16/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.6102 - val_loss: 0.3784
Epoch 17/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.5431 - val_loss: 0.3768
Epoch 18/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.5284 - val_loss: 0.4029
Epoch 19/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.5092 - val_loss: 0.3586
Epoch 20/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.3832 - val_loss: 0.3747
Epoch 21/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.4667 - val_loss: 0.3209
Epoch 22/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.4723 - val_loss: 0.3727
Epoch 23/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.3939 - val_loss: 0.4069
Epoch 24/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.3451 - val_loss: 0.4176
Epoch 25/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 15ms/step - loss: 1.3533 - val_loss: 0.3674
Epoch 26/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.3464 - val_loss: 0.3010
Epoch 27/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.3440 - val_loss: 0.3922
Epoch 28/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2810 - val_loss: 0.3386
Epoch 29/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2191 - val_loss: 0.3492
Epoch 30/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2700 - val_loss: 0.3981
Epoch 31/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.3477 - val_loss: 0.3905
Epoch 32/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.3152 - val_loss: 0.3948
Epoch 33/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2744 - val_loss: 0.3904
Epoch 34/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2857 - val_loss: 0.3538
Epoch 35/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 6ms/step - loss: 1.2550 - val_loss: 0.3604
Epoch 36/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2832 - val_loss: 0.3851
Epoch 37/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 1.2083 - val_loss: 0.3713
Epoch 38/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2435 - val_loss: 0.3195
Epoch 39/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2570 - val_loss: 0.3591
Epoch 40/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2140 - val_loss: 0.3145
Epoch 41/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 6ms/step - loss: 1.3116 - val_loss: 0.3504
Epoch 42/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.2215 - val_loss: 0.3300
Epoch 43/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2311 - val_loss: 0.2900
Epoch 44/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.2804 - val_loss: 0.3123
Epoch 45/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.2416 - val_loss: 0.2690
Epoch 46/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.2357 - val_loss: 0.3585
Epoch 47/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2360 - val_loss: 0.3480
Epoch 48/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 10ms/step - loss: 1.1846 - val_loss: 0.2819
Epoch 49/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.2401 - val_loss: 0.3116
Epoch 50/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 6ms/step - loss: 1.2199 - val_loss: 0.3275
Epoch 51/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1878 - val_loss: 0.3304
Epoch 52/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2340 - val_loss: 0.2855
Epoch 53/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.2356 - val_loss: 0.3079
Epoch 54/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1982 - val_loss: 0.3781
Epoch 55/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1904 - val_loss: 0.3046
Epoch 56/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1438 - val_loss: 0.3088
Epoch 57/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1583 - val_loss: 0.3761
Epoch 58/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1836 - val_loss: 0.3130
Epoch 59/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1245 - val_loss: 0.3135
Epoch 60/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1463 - val_loss: 0.3537
Epoch 61/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1222 - val_loss: 0.3582
Epoch 62/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1204 - val_loss: 0.3066
Epoch 63/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0663 - val_loss: 0.2981
Epoch 64/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1196 - val_loss: 0.3340
Epoch 65/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0988 - val_loss: 0.2898
Epoch 66/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1041 - val_loss: 0.3006
Epoch 67/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0544 - val_loss: 0.2970
Epoch 68/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1703 - val_loss: 0.2498
Epoch 69/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0619 - val_loss: 0.2769
Epoch 70/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1206 - val_loss: 0.2586
Epoch 71/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1017 - val_loss: 0.2959
Epoch 72/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1011 - val_loss: 0.2986
Epoch 73/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1399 - val_loss: 0.2408
Epoch 74/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0888 - val_loss: 0.3087
Epoch 75/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1052 - val_loss: 0.2366
Epoch 76/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0449 - val_loss: 0.3115
Epoch 77/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1046 - val_loss: 0.2994
Epoch 78/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0914 - val_loss: 0.3066
Epoch 79/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0305 - val_loss: 0.2944
Epoch 80/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0550 - val_loss: 0.2969
Epoch 81/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0954 - val_loss: 0.3187
Epoch 82/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1236 - val_loss: 0.2890
Epoch 83/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0461 - val_loss: 0.2956
Epoch 84/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0617 - val_loss: 0.2678
Epoch 85/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0369 - val_loss: 0.2509
Epoch 86/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0528 - val_loss: 0.3236
Epoch 87/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0568 - val_loss: 0.2442
Epoch 88/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0549 - val_loss: 0.2777
Epoch 89/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0523 - val_loss: 0.2935
Epoch 90/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0278 - val_loss: 0.2798
Epoch 91/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.9610 - val_loss: 0.3066
Epoch 92/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0297 - val_loss: 0.2243
Epoch 93/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0141 - val_loss: 0.2905
Epoch 94/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0028 - val_loss: 0.3072
Epoch 95/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0506 - val_loss: 0.2882
Epoch 96/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0073 - val_loss: 0.2186
Epoch 97/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0086 - val_loss: 0.3141
Epoch 98/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.9936 - val_loss: 0.2790
Epoch 99/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.9645 - val_loss: 0.3335
Epoch 100/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0158 - val_loss: 0.2461
In [329]:
# Baseline predictions
baseline_preds = model4.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model4.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(max(0, permuted_mse - baseline_mse))

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model4.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = max(0, mean_squared_error(y_test, month_preds) - baseline_mse)
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 3: Overnights (2019)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 18ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step

Model Evaluation¶

In [330]:
## Model evalauation
# Predict and flatten
train_preds = model4.predict([X_num_train, X_cat_train]).flatten()
test_preds = model4.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse4 = np.sqrt(train_mse)
test_rmse4 = np.sqrt(test_mse)

#MAE
train_mae4 = mean_absolute_error(y_train, train_preds)
test_mae4 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape4 = mean_absolute_percentage_error(y_train, train_preds)
test_mape4 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_4 = r2_score(y_train, train_preds)
test_r2_4 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse4:.4f}, MAE: {train_mae4:.4f}, MAPE: {train_mape4:.4f}, R²: {train_r2_4:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse4:.4f}, MAE: {test_mae4:.4f}, MAPE: {test_mape4:.4f}, R²: {test_r2_4:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
121/121 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
Train MSE: 0.1149, RMSE: 0.3390, MAE: 0.2549, MAPE: 0.0277, R²: 0.9768
Test MSE: 0.3801, RMSE: 0.6165, MAE: 0.4971, MAPE: 0.0493, R²: 0.9077
In [331]:
# Global trend plot (total actual vs predicted per month)
y_pred_all = model4.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Overnights (2019)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
164/164 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step
In [332]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Overnights per Country (2019)', fontsize=16, y=1.02)
plt.show()
In [333]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                   country           MSE          RMSE           MAE  \
0                  Austria  9.030588e+10  3.005094e+05  1.864868e+05   
1                  Belgium  2.222734e+08  1.490884e+04  9.771725e+03   
2   Bosnia and Herzegovina  1.791651e+09  4.232790e+04  3.016650e+04   
3                   Canada  4.502302e+08  2.121863e+04  1.406842e+04   
4           Czech Republic  9.575052e+10  3.094358e+05  1.572649e+05   
5                  Denmark  1.668924e+09  4.085246e+04  2.036179e+04   
6                  Finland  8.806166e+08  2.967519e+04  2.118418e+04   
7                   France  5.216737e+09  7.222698e+04  3.979571e+04   
8                  Germany  3.592724e+12  1.895448e+06  1.056744e+06   
9                  Hungary  1.180876e+10  1.086681e+05  5.517401e+04   
10                 Ireland  5.457368e+08  2.336101e+04  1.457357e+04   
11                   Italy  1.764677e+11  4.200806e+05  1.610925e+05   
12             Netherlands  4.946827e+09  7.033368e+04  3.836726e+04   
13                  Norway  1.175707e+09  3.428858e+04  1.777885e+04   
14                  Poland  1.563322e+11  3.953887e+05  1.973553e+05   
15                 Romania  8.443926e+08  2.905843e+04  1.467024e+04   
16                Slovakia  5.826269e+09  7.633000e+04  3.326159e+04   
17                Slovenia  2.389104e+11  4.887847e+05  2.336568e+05   
18                   Spain  7.090350e+08  2.662771e+04  1.438300e+04   
19                  Sweden  1.700015e+09  4.123123e+04  2.305828e+04   
20             Switzerland  2.423808e+09  4.923219e+04  2.420284e+04   
21                     USA  7.054939e+09  8.399368e+04  6.491786e+04   
22          United Kingdom  5.882305e+10  2.425346e+05  1.640790e+05   

        MAPE        R2  
0   0.341512  0.793687  
1   0.349829  0.982941  
2   0.343102  0.951547  
3   0.393229  0.633062  
4   0.321398  0.778906  
5   0.609540  0.932945  
6   0.547492  0.515157  
7   0.237299  0.906078  
8   0.382247  0.148015  
9   0.358141  0.928578  
10  0.386605  0.666496  
11  0.270358  0.680285  
12  0.305097  0.965537  
13  0.326562  0.937865  
14  0.372516  0.721547  
15  0.331823  0.700616  
16  0.295391  0.967036  
17  0.409152  0.673543  
18  0.355399  0.829893  
19  0.396644  0.938800  
20  0.269205  0.814150  
21  0.510810  0.336241  
22  0.397067  0.557544  

Arrivals 2024 LR = 0.001 Model 2¶

In [ ]:
# Load and clean data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
lag_cols = [col for col in cool.columns if 'lag' in col]
cool = cool.drop(columns=lag_cols, errors='ignore')
cool = cool[~cool['country'].isin(['Russian Federation'])]
In [ ]:
# Convert types
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)
cool['euro_adopted'] = cool['euro_adopted'].astype(int)

# Merge Google Trends data
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Sort and log-transform
cool = cool.sort_values(['country', 'date']).reset_index(drop=True)
cool = cool.drop_duplicates(subset=['country', 'date'], keep='first').reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals']) 
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])
cool['arrivals_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

# One-hot encode month
month_names = {i: month for i, month in enumerate(['January','February','March','April','May','June','July','August','September','October','November','December'], 1)}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool, ohe_month], axis=1).drop(columns=['month_name'])

# Create lags
lags = {'arrivals': [1, 3, 6, 12], 'overnights': [1, 3, 6, 12], 'cpi': [1], 'unemployment_rate': [3], 'google_trends': [1, 3]}
for var, steps in lags.items():
    for lag in steps:
        cool[f'{var}_lag_{lag}'] = cool.groupby('country')[var].shift(lag)

# Step 1: Define Schengen entry years
schengen_entry_year = {
    'Austria': 1995,
    'Belgium': 1995,
    'Czech Republic': 2007,
    'Denmark': 2001,
    'Finland': 1996,
    'France': 1995,
    'Germany': 1995,
    'Hungary': 2007,
    'Italy': 1997,
    'Netherlands': 1995,
    'Norway': 2001,
    'Poland': 2007,
    'Portugal': 1995,
    'Slovakia': 2007,
    'Slovenia': 2007,
    'Spain': 1995,
    'Sweden': 2001,
    'Switzerland': 2008,
    'Romania': 2024,}

def is_schengen_member(row):
    entry_year = schengen_entry_year.get(row['country'], np.inf)
    return int(row['year'] >= entry_year)

cool['schengen_member'] = cool.apply(is_schengen_member, axis=1).astype('int8')

# Filter and drop missing
cool.isna().sum().sort_values(ascending=False) 
cool = cool[cool['date'] >= '2001-01-01']
cool = cool[cool['arrivals_next_month'].notna()].copy()
In [ ]:
# Label encode country and scale numeric features
cool['country_encoded'] = LabelEncoder().fit_transform(cool['country'])
month_cols = [col for col in cool.columns if col.startswith('month_')]
X_numeric = cool[[
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member','euro_adopted'] + month_cols].values
X_country_array = cool['country_encoded'].astype('int32').values
y = cool['arrivals_next_month'].values
In [ ]:
cool
country arrivals overnights date year month month_sin month_cos eu_member euro_adopted ... overnights_lag_1 overnights_lag_3 overnights_lag_6 overnights_lag_12 cpi_lag_1 unemployment_rate_lag_3 google_trends_lag_1 google_trends_lag_3 schengen_member country_encoded
12 Austria 7.873217 9.102867 2001-01-01 2001 1 0.500000 8.660254e-01 1 1 ... 9.624303 11.286840 13.687496 9.077951 4.439116 3.6 -1.0 -1.0 1 0
13 Austria 8.664923 9.856815 2001-02-01 2001 2 0.866025 5.000000e-01 1 1 ... 9.102867 9.441055 13.771151 9.606159 4.440296 3.5 -1.0 -1.0 1 0
14 Austria 9.258368 10.391976 2001-03-01 2001 3 1.000000 6.120000e-17 1 1 ... 9.856815 9.624303 13.051318 10.189080 4.443827 3.8 -1.0 -1.0 1 0
15 Austria 10.645711 11.952940 2001-04-01 2001 4 0.866025 -5.000000e-01 1 1 ... 10.391976 9.102867 11.286840 11.828816 4.445001 4.2 -1.0 -1.0 1 0
16 Austria 11.169632 12.518896 2001-05-01 2001 5 0.500000 -8.660254e-01 1 1 ... 11.952940 9.856815 9.441055 12.243176 4.450853 4.5 -1.0 -1.0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6892 United Kingdom 11.936578 13.523750 2024-07-01 2024 7 -0.500000 -8.660254e-01 0 0 ... 13.349341 11.997115 8.512783 13.465556 5.016617 4.3 47.0 35.0 0 22
6893 United Kingdom 12.015293 13.678767 2024-08-01 2024 8 -0.866025 -5.000000e-01 0 0 ... 13.523750 12.908799 9.019059 13.632791 5.014627 4.1 47.0 32.0 0 22
6894 United Kingdom 11.742989 13.317415 2024-09-01 2024 9 -1.000000 -1.840000e-16 0 0 ... 13.678767 13.349341 9.886138 13.254668 5.018603 4.2 41.0 47.0 0 22
6895 United Kingdom 10.992100 12.530613 2024-10-01 2024 10 -0.866025 5.000000e-01 0 0 ... 13.317415 13.523750 11.997115 12.325113 5.017942 4.2 30.0 47.0 0 22
6896 United Kingdom 8.841593 9.982068 2024-11-01 2024 11 -0.500000 8.660254e-01 0 0 ... 12.530613 13.678767 12.908799 9.631482 5.023881 4.4 19.0 41.0 0 22

6599 rows × 42 columns

In [ ]:
cool.isna().sum()
country                      0
arrivals                     0
overnights                   0
date                         0
year                         0
month                        0
month_sin                    0
month_cos                    0
eu_member                    0
euro_adopted                 0
unemployment_rate            0
exchange_rate                0
industry_production        363
cpi                          0
google_trends                0
arrivals_next_month          0
month_April                  0
month_August                 0
month_December               0
month_February               0
month_January                0
month_July                   0
month_June                   0
month_March                  0
month_May                    0
month_November               0
month_October                0
month_September              0
arrivals_lag_1               0
arrivals_lag_3               0
arrivals_lag_6               0
arrivals_lag_12              0
overnights_lag_1             0
overnights_lag_3             0
overnights_lag_6             0
overnights_lag_12            0
cpi_lag_1                    0
unemployment_rate_lag_3      0
google_trends_lag_1          0
google_trends_lag_3          0
schengen_member              0
country_encoded              0
dtype: int64
In [ ]:
# Time-based split
train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool['date'] <= train_end
val_mask = (cool['date'] > train_end) & (cool['date'] <= val_end)
test_mask = cool['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)  

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]

# Define model2
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
n_countries = cool['country_encoded'].nunique()
embedding = Embedding(input_dim=n_countries, output_dim=10)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)
model2 = Model(inputs=[input_numeric, input_country], outputs=output)
model2.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model2.summary()

# Train model2
early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
history = model2.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16, callbacks=[early_stop]
)
Model: "functional_37"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_37        │ (None, 1, 10)     │        230 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_37          │ (None, 10)        │          0 │ embedding_37[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_37      │ (None, 35)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_37[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_111 (Dense)   │ (None, 64)        │      2,304 │ concatenate_37[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_111[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_74          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_112 (Dense)   │ (None, 32)        │      2,080 │ dropout_74[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_112[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_75          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_113 (Dense)   │ (None, 1)         │         33 │ dropout_75[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 5,031 (19.65 KB)
 Trainable params: 4,839 (18.90 KB)
 Non-trainable params: 192 (768.00 B)
Epoch 1/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 14s 14ms/step - loss: 67.6397 - val_loss: 24.2574
Epoch 2/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 16.7866 - val_loss: 2.7601
Epoch 3/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 3.6225 - val_loss: 2.3808
Epoch 4/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - loss: 2.9429 - val_loss: 2.4379
Epoch 5/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 10ms/step - loss: 2.4179 - val_loss: 2.2861
Epoch 6/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 12ms/step - loss: 2.2089 - val_loss: 2.4861
Epoch 7/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.8824 - val_loss: 2.4279
Epoch 8/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.7382 - val_loss: 2.4234
Epoch 9/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 7s 24ms/step - loss: 1.6110 - val_loss: 2.4041
Epoch 10/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 6s 7ms/step - loss: 1.3417 - val_loss: 2.2658
Epoch 11/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.2643 - val_loss: 2.2970
Epoch 12/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 11ms/step - loss: 1.2401 - val_loss: 2.3647
Epoch 13/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 8s 18ms/step - loss: 1.1678 - val_loss: 2.3226
Epoch 14/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 1.1266 - val_loss: 2.3435
Epoch 15/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1508 - val_loss: 2.2937
Epoch 16/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1442 - val_loss: 2.3006
Epoch 17/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0613 - val_loss: 2.3334
Epoch 18/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0120 - val_loss: 2.3491
Epoch 19/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0003 - val_loss: 2.3213
Epoch 20/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9837 - val_loss: 2.3538
Epoch 21/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.9836 - val_loss: 2.3499
Epoch 22/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9727 - val_loss: 2.3663
Epoch 23/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9948 - val_loss: 2.3869
Epoch 24/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9877 - val_loss: 2.3571
Epoch 25/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9401 - val_loss: 2.3121
Epoch 26/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9512 - val_loss: 2.4112
Epoch 27/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9802 - val_loss: 2.3515
Epoch 28/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9385 - val_loss: 2.4328
Epoch 29/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9237 - val_loss: 2.3702
Epoch 30/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9208 - val_loss: 2.4257
Epoch 31/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9504 - val_loss: 2.4614
Epoch 32/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.9075 - val_loss: 2.3074
Epoch 33/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8661 - val_loss: 2.2357
Epoch 34/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8901 - val_loss: 2.3487
Epoch 35/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 17ms/step - loss: 0.8557 - val_loss: 2.4129
Epoch 36/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 14ms/step - loss: 0.8772 - val_loss: 2.3710
Epoch 37/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8398 - val_loss: 2.4213
Epoch 38/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8535 - val_loss: 2.3572
Epoch 39/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8696 - val_loss: 2.3496
Epoch 40/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.8352 - val_loss: 2.2743
Epoch 41/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.8381 - val_loss: 2.4121
Epoch 42/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8806 - val_loss: 2.2995
Epoch 43/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8205 - val_loss: 2.3153
Epoch 44/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8615 - val_loss: 2.3236
Epoch 45/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8250 - val_loss: 2.2614
Epoch 46/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8223 - val_loss: 2.3344
Epoch 47/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8433 - val_loss: 2.3264
Epoch 48/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8229 - val_loss: 2.2402
Epoch 49/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7742 - val_loss: 2.3089
Epoch 50/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7955 - val_loss: 2.2734
Epoch 51/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7981 - val_loss: 2.2787
Epoch 52/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7627 - val_loss: 2.2760
Epoch 53/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 9ms/step - loss: 0.8173 - val_loss: 2.3125
Epoch 54/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8277 - val_loss: 2.2606
Epoch 55/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.7665 - val_loss: 2.3070
Epoch 56/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7709 - val_loss: 2.2386
Epoch 57/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - loss: 0.7876 - val_loss: 2.3755
Epoch 58/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7985 - val_loss: 2.2257
Epoch 59/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7697 - val_loss: 2.3401
Epoch 60/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7739 - val_loss: 2.2278
Epoch 61/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7652 - val_loss: 2.3139
Epoch 62/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7971 - val_loss: 2.2271
Epoch 63/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.7472 - val_loss: 2.2477
Epoch 64/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.7693 - val_loss: 2.2629
Epoch 65/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 10ms/step - loss: 0.7221 - val_loss: 2.2515
Epoch 66/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 7ms/step - loss: 0.7554 - val_loss: 2.2968
Epoch 67/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.7474 - val_loss: 2.2501
Epoch 68/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7336 - val_loss: 2.1876
Epoch 69/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6973 - val_loss: 2.2026
Epoch 70/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6985 - val_loss: 2.2146
Epoch 71/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 5ms/step - loss: 0.7471 - val_loss: 2.2008
Epoch 72/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7348 - val_loss: 2.2450
Epoch 73/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.7008 - val_loss: 2.1541
Epoch 74/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.7303 - val_loss: 2.1802
Epoch 75/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.7044 - val_loss: 2.2246
Epoch 76/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.7006 - val_loss: 2.2524
Epoch 77/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7326 - val_loss: 2.2514
Epoch 78/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.6666 - val_loss: 2.2973
Epoch 79/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.6815 - val_loss: 2.1639
Epoch 80/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6999 - val_loss: 2.2235
Epoch 81/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.6720 - val_loss: 2.2327
Epoch 82/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7127 - val_loss: 2.2349
Epoch 83/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6997 - val_loss: 2.2009
Epoch 84/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6761 - val_loss: 2.1925
Epoch 85/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6760 - val_loss: 2.2075
Epoch 86/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6772 - val_loss: 2.2640
Epoch 87/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6492 - val_loss: 2.2148
Epoch 88/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.6594 - val_loss: 2.2313
Epoch 89/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.6871 - val_loss: 2.1818
Epoch 90/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6710 - val_loss: 2.1486
Epoch 91/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6750 - val_loss: 2.2075
Epoch 92/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.6639 - val_loss: 2.1997
Epoch 93/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6333 - val_loss: 2.1981
Epoch 94/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.6760 - val_loss: 2.1814
Epoch 95/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6528 - val_loss: 2.2041
Epoch 96/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6243 - val_loss: 2.2046
Epoch 97/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6439 - val_loss: 2.2165
Epoch 98/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6469 - val_loss: 2.1735
Epoch 99/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6199 - val_loss: 2.1718
Epoch 100/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6182 - val_loss: 2.2016
In [ ]:
# Baseline predictions
baseline_preds = model2.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model2.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(permuted_mse - baseline_mse)

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model2.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = mean_squared_error(y_test, month_preds) - baseline_mse
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 2: Arrivals (2024)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step

Model Evaluation¶

In [ ]:
## Model evalauation
# Predict and flatten
train_preds = model2.predict([X_num_train, X_cat_train]).flatten()
test_preds = model2.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse2 = np.sqrt(train_mse)
test_rmse2 = np.sqrt(test_mse)

#MAE
train_mae2 = mean_absolute_error(y_train, train_preds)
test_mae2 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape2 = mean_absolute_percentage_error(y_train, train_preds)
test_mape2 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_2 = r2_score(y_train, train_preds)
test_r2_2 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse2:.4f}, MAE: {train_mae2:.4f}, MAPE: {train_mape2:.4f}, R²: {train_r2_2:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse2:.4f}, MAE: {test_mae2:.4f}, MAPE: {test_mape2:.4f}, R²: {test_r2_2:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
138/138 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
Train MSE: 0.0749, RMSE: 0.2738, MAE: 0.2059, MAPE: 0.0257, R²: 0.9807
Test MSE: 0.4215, RMSE: 0.6492, MAE: 0.4911, MAPE: 0.0579, R²: 0.8919
In [ ]:
# Global trend plot (total actual vs predicted per month)
y_pred_all = model2.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Arrivals (2024)")
plt.xlabel("Date")
plt.ylabel("Total Arrivals")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
207/207 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step
In [ ]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals per Country (2024)', fontsize=16, y=1.02)
plt.show()
In [ ]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                   country           MSE           RMSE            MAE  \
0                  Austria  5.081636e+09   71285.595256   49918.328535   
1                  Belgium  6.537096e+07    8085.231123    4196.487165   
2   Bosnia and Herzegovina  1.642307e+08   12815.252438    8874.936591   
3                   Canada  1.947356e+07    4412.885448    2575.362746   
4           Czech Republic  1.393131e+09   37324.668759   19292.966149   
5                  Denmark  1.315828e+08   11470.952964    4718.637211   
6                  Finland  1.638380e+07    4047.691017    2332.584180   
7                   France  4.337233e+08   20826.025097   12390.277720   
8                  Germany  7.127464e+10  266973.101306  161100.764622   
9                  Hungary  1.225432e+09   35006.166906   18925.231006   
10                 Ireland  3.426333e+07    5853.489093    3640.333267   
11                   Italy  1.647708e+09   40591.974631   20378.707260   
12             Netherlands  6.173665e+08   24846.861121   12934.523989   
13                  Norway  6.668156e+07    8165.878025    3941.330346   
14                  Poland  3.940971e+09   62777.156710   35559.869517   
15                 Romania  3.460065e+07    5882.232142    3181.175834   
16                Slovakia  1.441173e+08   12004.884555    6607.882405   
17                Slovenia  2.817913e+09   53084.015108   37657.455940   
18                   Spain  5.689977e+07    7543.193539    4248.567497   
19                  Sweden  5.434318e+07    7371.782476    3788.066846   
20             Switzerland  2.199884e+08   14832.006118    8522.123304   
21                     USA  3.815869e+08   19534.250033   13980.165553   
22          United Kingdom  8.179276e+08   28599.433484   17370.251709   

        MAPE        R2  
0   0.574757  0.617748  
1   0.581596  0.851310  
2   0.408353  0.854232  
3   0.614233  0.829186  
4   0.311453  0.848878  
5   0.522899  0.700217  
6   0.762202  0.516292  
7   0.414942  0.834006  
8   0.527543  0.221635  
9   0.459027  0.787361  
10  0.653992  0.523599  
11  0.382417  0.811186  
12  0.397557  0.803672  
13  0.792227  0.571320  
14  0.477038  0.710003  
15  0.390700  0.815610  
16  0.313489  0.961850  
17  0.549796  0.772319  
18  0.401895  0.862284  
19  0.472303  0.882823  
20  0.345093  0.620763  
21  0.455143  0.801948  
22  0.743835  0.738716  

Arrivals 2019 LR=0.001 Model 1¶

In [266]:
# Load and clean data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
lag_cols = [col for col in cool.columns if 'lag' in col]
cool = cool.drop(columns=lag_cols, errors='ignore')
cool = cool[~cool['country'].isin(['Russian Federation'])]
In [267]:
# Convert types
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)
cool['euro_adopted'] = cool['euro_adopted'].astype(int)

# Merge Google Trends data
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Sort and log-transform
cool = cool.sort_values(['country', 'date']).reset_index(drop=True)
cool = cool.drop_duplicates(subset=['country', 'date'], keep='first').reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals']) 
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])
cool['arrivals_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

# One-hot encode month
month_names = {i: month for i, month in enumerate(['January','February','March','April','May','June','July','August','September','October','November','December'], 1)}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool, ohe_month], axis=1).drop(columns=['month_name'])

# Create lags
lags = {'arrivals': [1, 3, 6, 12], 'overnights': [1, 3, 6, 12], 'cpi': [1], 'unemployment_rate': [3], 'google_trends': [1, 3]}
for var, steps in lags.items():
    for lag in steps:
        cool[f'{var}_lag_{lag}'] = cool.groupby('country')[var].shift(lag)

# Step 1: Define Schengen entry years
schengen_entry_year = {
    'Austria': 1995,
    'Belgium': 1995,
    'Czech Republic': 2007,
    'Denmark': 2001,
    'Finland': 1996,
    'France': 1995,
    'Germany': 1995,
    'Hungary': 2007,
    'Italy': 1997,
    'Netherlands': 1995,
    'Norway': 2001,
    'Poland': 2007,
    'Portugal': 1995,
    'Slovakia': 2007,
    'Slovenia': 2007,
    'Spain': 1995,
    'Sweden': 2001,
    'Switzerland': 2008,
    'Romania': 2024,}

def is_schengen_member(row):
    entry_year = schengen_entry_year.get(row['country'], np.inf)
    return int(row['year'] >= entry_year)

cool['schengen_member'] = cool.apply(is_schengen_member, axis=1).astype('int8')

# Filter and drop missing
cool.isna().sum().sort_values(ascending=False) 
cool = cool[cool['date'] >= '2001-01-01']
cool = cool[cool['date'] <= '2019-12-01']

cool = cool[cool['arrivals_next_month'].notna()].copy()
In [268]:
# Label encode country and scale numeric features
cool['country_encoded'] = LabelEncoder().fit_transform(cool['country'])
month_cols = [col for col in cool.columns if col.startswith('month_')]
X_numeric = cool[[
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member','euro_adopted'] + month_cols].values
X_country_array = cool['country_encoded'].astype('int32').values
y = cool['arrivals_next_month'].values
In [269]:
# Time-based split
train_end = pd.Timestamp("2014-12-31")
val_end = pd.Timestamp("2017-12-31")
train_mask = cool['date'] <= train_end
val_mask = (cool['date'] > train_end) & (cool['date'] <= val_end)
test_mask = cool['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)  

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]
In [270]:
# Define model4
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
n_countries = cool['country_encoded'].nunique()
embedding = Embedding(input_dim=n_countries, output_dim=10)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)
model1 = Model(inputs=[input_numeric, input_country], outputs=output)
model1.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model1.summary()

# Train mode4
early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
history = model1.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16, callbacks=[early_stop]
)
Model: "functional_30"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_30        │ (None, 1, 10)     │        230 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_30          │ (None, 10)        │          0 │ embedding_30[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_30      │ (None, 35)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_30[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_90 (Dense)    │ (None, 64)        │      2,304 │ concatenate_30[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_90[0][0]    │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_60          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_91 (Dense)    │ (None, 32)        │      2,080 │ dropout_60[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_91[0][0]    │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_61          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_92 (Dense)    │ (None, 1)         │         33 │ dropout_61[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 5,031 (19.65 KB)
 Trainable params: 4,839 (18.90 KB)
 Non-trainable params: 192 (768.00 B)
Epoch 1/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 9s 8ms/step - loss: 67.7775 - val_loss: 31.6445
Epoch 2/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 22.3988 - val_loss: 1.5322
Epoch 3/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 3.8682 - val_loss: 0.7337
Epoch 4/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 3.3002 - val_loss: 0.5032
Epoch 5/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.6980 - val_loss: 0.4413
Epoch 6/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 2.3971 - val_loss: 0.3308
Epoch 7/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 2.0858 - val_loss: 0.3201
Epoch 8/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.9076 - val_loss: 0.3460
Epoch 9/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 9ms/step - loss: 1.8606 - val_loss: 0.3169
Epoch 10/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.6263 - val_loss: 0.3324
Epoch 11/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.5082 - val_loss: 0.2692
Epoch 12/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.3668 - val_loss: 0.2613
Epoch 13/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.4066 - val_loss: 0.3108
Epoch 14/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2721 - val_loss: 0.2947
Epoch 15/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.3073 - val_loss: 0.2788
Epoch 16/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.1615 - val_loss: 0.2917
Epoch 17/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2453 - val_loss: 0.2769
Epoch 18/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.1692 - val_loss: 0.2724
Epoch 19/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - loss: 1.1334 - val_loss: 0.3020
Epoch 20/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1748 - val_loss: 0.2422
Epoch 21/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.1109 - val_loss: 0.2975
Epoch 22/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0664 - val_loss: 0.3064
Epoch 23/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1109 - val_loss: 0.2958
Epoch 24/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0124 - val_loss: 0.2453
Epoch 25/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0531 - val_loss: 0.2697
Epoch 26/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0846 - val_loss: 0.2782
Epoch 27/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0858 - val_loss: 0.2751
Epoch 28/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.0140 - val_loss: 0.2530
Epoch 29/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0381 - val_loss: 0.2716
Epoch 30/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9978 - val_loss: 0.3025
Epoch 31/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0015 - val_loss: 0.2562
Epoch 32/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9500 - val_loss: 0.2932
Epoch 33/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9884 - val_loss: 0.2509
Epoch 34/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9633 - val_loss: 0.2902
Epoch 35/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9817 - val_loss: 0.3144
Epoch 36/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9323 - val_loss: 0.2615
Epoch 37/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9170 - val_loss: 0.2546
Epoch 38/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9498 - val_loss: 0.2824
Epoch 39/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9296 - val_loss: 0.2132
Epoch 40/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9895 - val_loss: 0.2419
Epoch 41/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 10ms/step - loss: 0.9238 - val_loss: 0.3262
Epoch 42/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9853 - val_loss: 0.2984
Epoch 43/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.9708 - val_loss: 0.2598
Epoch 44/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - loss: 0.9413 - val_loss: 0.2332
Epoch 45/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9508 - val_loss: 0.2466
Epoch 46/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8969 - val_loss: 0.2490
Epoch 47/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9293 - val_loss: 0.2760
Epoch 48/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9200 - val_loss: 0.2497
Epoch 49/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9380 - val_loss: 0.2382
Epoch 50/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9109 - val_loss: 0.2762
Epoch 51/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9188 - val_loss: 0.2745
Epoch 52/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9029 - val_loss: 0.2906
Epoch 53/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 0.9281 - val_loss: 0.2222
Epoch 54/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 9ms/step - loss: 0.8881 - val_loss: 0.2893
Epoch 55/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8973 - val_loss: 0.2331
Epoch 56/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8678 - val_loss: 0.2558
Epoch 57/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - loss: 0.8569 - val_loss: 0.2376
Epoch 58/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8497 - val_loss: 0.2230
Epoch 59/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8584 - val_loss: 0.2637
Epoch 60/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8452 - val_loss: 0.2468
Epoch 61/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8408 - val_loss: 0.2767
Epoch 62/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8325 - val_loss: 0.2766
Epoch 63/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.8907 - val_loss: 0.3192
Epoch 64/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 10ms/step - loss: 0.8712 - val_loss: 0.1979
Epoch 65/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8256 - val_loss: 0.2884
Epoch 66/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8836 - val_loss: 0.2676
Epoch 67/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8484 - val_loss: 0.2426
Epoch 68/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 9ms/step - loss: 0.8475 - val_loss: 0.2432
Epoch 69/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8207 - val_loss: 0.2640
Epoch 70/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8451 - val_loss: 0.2363
Epoch 71/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.7694 - val_loss: 0.2579
Epoch 72/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8216 - val_loss: 0.3550
Epoch 73/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8074 - val_loss: 0.2073
Epoch 74/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8457 - val_loss: 0.2140
Epoch 75/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7992 - val_loss: 0.2816
Epoch 76/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.8139 - val_loss: 0.2271
Epoch 77/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.7944 - val_loss: 0.2203
Epoch 78/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8483 - val_loss: 0.2180
Epoch 79/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 9ms/step - loss: 0.8018 - val_loss: 0.2443
Epoch 80/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8288 - val_loss: 0.2461
Epoch 81/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8137 - val_loss: 0.2634
Epoch 82/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7978 - val_loss: 0.2523
Epoch 83/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - loss: 0.7900 - val_loss: 0.2177
Epoch 84/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - loss: 0.7369 - val_loss: 0.2790
Epoch 85/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.8106 - val_loss: 0.2041
Epoch 86/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7447 - val_loss: 0.2484
Epoch 87/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 0.7948 - val_loss: 0.2330
Epoch 88/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7684 - val_loss: 0.2200
Epoch 89/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7770 - val_loss: 0.2393
Epoch 90/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8086 - val_loss: 0.1829
Epoch 91/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7575 - val_loss: 0.1823
Epoch 92/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7621 - val_loss: 0.1903
Epoch 93/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7652 - val_loss: 0.2086
Epoch 94/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7786 - val_loss: 0.2223
Epoch 95/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7741 - val_loss: 0.2094
Epoch 96/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.7545 - val_loss: 0.1820
Epoch 97/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.7618 - val_loss: 0.2086
Epoch 98/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7097 - val_loss: 0.1998
Epoch 99/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.7333 - val_loss: 0.2348
Epoch 100/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7541 - val_loss: 0.2034
In [271]:
# Baseline predictions
baseline_preds = model1.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model1.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(max(0, permuted_mse - baseline_mse))

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model1.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = max(0, mean_squared_error(y_test, month_preds) - baseline_mse)

else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 1: Arrivals (2019)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
18/18 ━━━━━━━━━━━━━━━━━━━━ 2s 58ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 19ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 

Model Evaluation¶

In [272]:
## Model evalauation
# Predict and flatten
train_preds = model1.predict([X_num_train, X_cat_train]).flatten()
test_preds = model1.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse1 = np.sqrt(train_mse)
test_rmse1 = np.sqrt(test_mse)

#MAE
train_mae1 = mean_absolute_error(y_train, train_preds)
test_mae1 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape1 = mean_absolute_percentage_error(y_train, train_preds)
test_mape1 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_1 = r2_score(y_train, train_preds)
test_r2_1 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse1:.4f}, MAE: {train_mae1:.4f}, MAPE: {train_mape1:.4f}, R²: {train_r2_1:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse1:.4f}, MAE: {test_mae1:.4f}, MAPE: {test_mape1:.4f}, R²: {test_r2_1:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
121/121 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
Train MSE: 0.0857, RMSE: 0.2927, MAE: 0.2219, MAPE: 0.0277, R²: 0.9779
Test MSE: 0.4103, RMSE: 0.6405, MAE: 0.4875, MAPE: 0.0546, R²: 0.8716
In [273]:
# Global trend plot (total actual vs predicted per month)
y_pred_all = model1.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Arrivals (2019)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
164/164 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step
In [274]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals per Country (2019)', fontsize=16, y=1.02)
plt.show()
In [275]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                   country           MSE           RMSE            MAE  \
0                  Austria  2.679614e+09   51764.985675   37030.900614   
1                  Belgium  2.492627e+07    4992.621074    3350.236781   
2   Bosnia and Herzegovina  1.842222e+08   13572.847560   10908.470133   
3                   Canada  7.779840e+07    8820.339876    5862.227914   
4           Czech Republic  5.728626e+08   23934.548834   13299.749715   
5                  Denmark  9.971241e+07    9985.609978    4399.104020   
6                  Finland  4.199998e+07    6480.738915    5031.262491   
7                   France  1.101853e+08   10496.917985    7294.615351   
8                  Germany  4.560198e+10  213546.207054  127152.686279   
9                  Hungary  6.498556e+08   25492.265914   13569.599833   
10                 Ireland  3.477323e+07    5896.883205    3741.162018   
11                   Italy  7.621585e+09   87301.691953   31303.574341   
12             Netherlands  1.198660e+08   10948.333395    7456.408488   
13                  Norway  3.310414e+07    5753.619902    3318.054125   
14                  Poland  2.317188e+09   48137.174735   28348.363419   
15                 Romania  2.196468e+07    4686.648987    2272.309680   
16                Slovakia  2.515013e+07    5014.990083    2605.888275   
17                Slovenia  3.576730e+09   59805.772720   41043.037821   
18                   Spain  2.336578e+07    4833.815971    3305.533460   
19                  Sweden  4.620571e+07    6797.478266    4890.971967   
20             Switzerland  7.623722e+07    8731.392972    6250.464722   
21                     USA  8.693620e+08   29484.944652   23078.987376   
22          United Kingdom  1.602999e+09   40037.469582   28814.113729   

        MAPE        R2  
0   0.317875  0.766548  
1   0.327174  0.943330  
2   0.368343  0.841933  
3   0.354829  0.593108  
4   0.285757  0.931426  
5   0.454653  0.783175  
6   0.584503  0.383765  
7   0.182306  0.967303  
8   0.360820  0.267050  
9   0.315947  0.874721  
10  0.399846  0.507424  
11  0.202527  0.592406  
12  0.327610  0.955578  
13  0.363048  0.925196  
14  0.379036  0.787249  
15  0.254269  0.851325  
16  0.244951  0.993074  
17  0.405495  0.717762  
18  0.251376  0.974041  
19  0.429396  0.946978  
20  0.289273  0.852513  
21  0.473352  0.466255  
22  0.360491  0.654020  

Non-schengen¶

In [350]:
# Load data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
cool = cool[~cool['country'].isin(['Russian Federation'])]

# Type conversions
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)

# Add Schengen and Euro info
schengen_entry_year = {
    'Austria': 1995, 'Belgium': 1995, 'Czech Republic': 2007, 'Denmark': 2001,
    'Finland': 1996, 'France': 1995, 'Germany': 1995, 'Hungary': 2007,
    'Italy': 1997, 'Netherlands': 1995, 'Norway': 2001, 'Poland': 2007,
    'Portugal': 1995, 'Slovakia': 2007, 'Slovenia': 2007, 'Spain': 1995,
    'Sweden': 2001, 'Switzerland': 2008, 'Romania': 2024,
}

cool['schengen_member'] = cool.apply(lambda row: int(row['year'] >= schengen_entry_year.get(row['country'], np.inf)), axis=1).astype('int8')

# Add Google Trends
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Log transform + lags
cool = cool.sort_values(['country', 'date']).drop_duplicates(subset=['country', 'date']).reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals'])
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])

cool['arrivals_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

month_names = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool.drop(columns=['month_name']), ohe_month], axis=1)

cool['arrivals_lag_1'] = cool.groupby('country')['arrivals'].shift(1)
cool['arrivals_lag_3'] = cool.groupby('country')['arrivals'].shift(3)
cool['arrivals_lag_6'] = cool.groupby('country')['arrivals'].shift(6)
cool['arrivals_lag_12'] = cool.groupby('country')['arrivals'].shift(12)
cool['cpi_lag_1'] = cool.groupby('country')['cpi'].shift(1)
cool['unemp_rate_lag_3'] = cool.groupby('country')['unemployment_rate'].shift(3)
cool['google_trends_lag_1'] = cool.groupby('country')['google_trends'].shift(1)
cool['google_trends_lag_3'] = cool.groupby('country')['google_trends'].shift(3)

cool = cool[cool['date'] >= '2001-01-01']
cool_ns = cool[cool['country'].isin(['Ireland','Bosnia and Herzegovina', 'USA', 'United Kingdom', 'Canada'])].copy()
cool_ns = cool_ns[cool_ns['arrivals_next_month'].notna()].copy()

cool_ns['country_encoded'] = LabelEncoder().fit_transform(cool_ns['country'])
month_cols = [col for col in cool_ns.columns if col.startswith('month_')]

X_numeric = cool_ns[[
    'unemp_rate_lag_3', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted'
] + month_cols].values

X_country_array = cool_ns['country_encoded'].astype('int32').values
y = cool_ns['arrivals_next_month'].values

train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool_ns['date'] <= train_end
val_mask = (cool_ns['date'] > train_end) & (cool_ns['date'] <= val_end)
test_mask = cool_ns['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]

input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
embedding = Embedding(input_dim=len(np.unique(X_country_array)), output_dim=5)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)

model0 = Model(inputs=[input_numeric, input_country], outputs=output)
model0.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
history = model0.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16)

model0.summary()
Epoch 1/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 19s 21ms/step - loss: 70.2320 - val_loss: 67.7599
Epoch 2/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 62.0043 - val_loss: 57.4380
Epoch 3/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 3s 28ms/step - loss: 52.5921 - val_loss: 44.6008
Epoch 4/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 41.1542 - val_loss: 29.7536
Epoch 5/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 37ms/step - loss: 27.9099 - val_loss: 17.7265
Epoch 6/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 4s 38ms/step - loss: 16.7578 - val_loss: 10.7735
Epoch 7/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 8.8176 - val_loss: 4.5991
Epoch 8/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 18ms/step - loss: 4.9372 - val_loss: 2.4491
Epoch 9/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 16ms/step - loss: 3.5004 - val_loss: 2.2870
Epoch 10/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 21ms/step - loss: 3.5081 - val_loss: 2.2256
Epoch 11/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 8s 103ms/step - loss: 2.7785 - val_loss: 2.1660
Epoch 12/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 3s 23ms/step - loss: 2.7310 - val_loss: 2.1169
Epoch 13/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 2.6056 - val_loss: 2.0984
Epoch 14/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 3.0444 - val_loss: 2.1610
Epoch 15/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 20ms/step - loss: 2.6913 - val_loss: 2.1628
Epoch 16/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step - loss: 2.2879 - val_loss: 1.9532
Epoch 17/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 2.5391 - val_loss: 2.0436
Epoch 18/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.9582 - val_loss: 2.0355
Epoch 19/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.9699 - val_loss: 2.0712
Epoch 20/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.8013 - val_loss: 2.0091
Epoch 21/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 3s 43ms/step - loss: 1.8588 - val_loss: 1.9895
Epoch 22/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 4s 24ms/step - loss: 2.0070 - val_loss: 1.8358
Epoch 23/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 1.8016 - val_loss: 1.9515
Epoch 24/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 1.9085 - val_loss: 2.0363
Epoch 25/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.8029 - val_loss: 2.0660
Epoch 26/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 1.5171 - val_loss: 2.0622
Epoch 27/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 27ms/step - loss: 1.6722 - val_loss: 2.0327
Epoch 28/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 11ms/step - loss: 1.5666 - val_loss: 1.9213
Epoch 29/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.5632 - val_loss: 2.0125
Epoch 30/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.6246 - val_loss: 1.8871
Epoch 31/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.4207 - val_loss: 1.9351
Epoch 32/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.3482 - val_loss: 1.9312
Epoch 33/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.4414 - val_loss: 1.8715
Epoch 34/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1816 - val_loss: 1.9154
Epoch 35/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 1.3311 - val_loss: 1.9495
Epoch 36/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.3297 - val_loss: 1.8732
Epoch 37/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.3147 - val_loss: 1.8953
Epoch 38/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.3075 - val_loss: 1.9057
Epoch 39/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2924 - val_loss: 1.8423
Epoch 40/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2604 - val_loss: 1.8251
Epoch 41/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2987 - val_loss: 1.8850
Epoch 42/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2480 - val_loss: 1.8663
Epoch 43/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2543 - val_loss: 1.8764
Epoch 44/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1732 - val_loss: 1.8326
Epoch 45/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0759 - val_loss: 1.8918
Epoch 46/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0416 - val_loss: 1.8623
Epoch 47/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.0965 - val_loss: 1.9172
Epoch 48/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0024 - val_loss: 1.9087
Epoch 49/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0457 - val_loss: 1.9199
Epoch 50/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.8859 - val_loss: 1.8941
Epoch 51/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.9685 - val_loss: 1.8796
Epoch 52/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1287 - val_loss: 1.8473
Epoch 53/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 1.0485 - val_loss: 1.8785
Epoch 54/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.0335 - val_loss: 1.8847
Epoch 55/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.9415 - val_loss: 1.8722
Epoch 56/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.9807 - val_loss: 1.8616
Epoch 57/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.9180 - val_loss: 1.9059
Epoch 58/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 1.0627 - val_loss: 1.8404
Epoch 59/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.8911 - val_loss: 1.9043
Epoch 60/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.9765 - val_loss: 1.8932
Epoch 61/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9974 - val_loss: 1.8229
Epoch 62/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0363 - val_loss: 1.8347
Epoch 63/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.8997 - val_loss: 1.8874
Epoch 64/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8870 - val_loss: 1.8901
Epoch 65/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8145 - val_loss: 1.8895
Epoch 66/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8541 - val_loss: 1.9292
Epoch 67/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8720 - val_loss: 1.8794
Epoch 68/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 0.9244 - val_loss: 1.9101
Epoch 69/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.8384 - val_loss: 1.9313
Epoch 70/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 16ms/step - loss: 0.8639 - val_loss: 1.9346
Epoch 71/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.9237 - val_loss: 1.9323
Epoch 72/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 22ms/step - loss: 0.8753 - val_loss: 1.9283
Epoch 73/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 0.8214 - val_loss: 1.9219
Epoch 74/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.8923 - val_loss: 1.9176
Epoch 75/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.8144 - val_loss: 1.8879
Epoch 76/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.8084 - val_loss: 1.8796
Epoch 77/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.7641 - val_loss: 1.8786
Epoch 78/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.8447 - val_loss: 1.8973
Epoch 79/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 0.7786 - val_loss: 1.9153
Epoch 80/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 0.8689 - val_loss: 1.8983
Epoch 81/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8436 - val_loss: 1.9101
Epoch 82/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.8528 - val_loss: 1.9517
Epoch 83/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7476 - val_loss: 1.9288
Epoch 84/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7558 - val_loss: 1.8960
Epoch 85/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7617 - val_loss: 1.9375
Epoch 86/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.8155 - val_loss: 1.9272
Epoch 87/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8460 - val_loss: 1.9310
Epoch 88/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7485 - val_loss: 1.9583
Epoch 89/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.7601 - val_loss: 1.9537
Epoch 90/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.8471 - val_loss: 1.8919
Epoch 91/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.6934 - val_loss: 1.9042
Epoch 92/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7554 - val_loss: 1.9332
Epoch 93/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.7792 - val_loss: 1.9139
Epoch 94/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7224 - val_loss: 1.9408
Epoch 95/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7511 - val_loss: 1.9806
Epoch 96/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.6967 - val_loss: 1.9740
Epoch 97/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.7939 - val_loss: 1.9248
Epoch 98/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7514 - val_loss: 1.9794
Epoch 99/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.8539 - val_loss: 1.9747
Epoch 100/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.7982 - val_loss: 2.0086
Model: "functional_38"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_38        │ (None, 1, 5)      │         25 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_38          │ (None, 5)         │          0 │ embedding_38[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_38      │ (None, 30)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_38[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_114 (Dense)   │ (None, 64)        │      1,984 │ concatenate_38[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_114[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_76          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_115 (Dense)   │ (None, 32)        │      2,080 │ dropout_76[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_115[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_77          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_116 (Dense)   │ (None, 1)         │         33 │ dropout_77[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 13,136 (51.32 KB)
 Trainable params: 4,314 (16.85 KB)
 Non-trainable params: 192 (768.00 B)
 Optimizer params: 8,630 (33.71 KB)

Model Evaluation¶

In [351]:
## Model evalauation
# Predict and flatten
train_preds = model0.predict([X_num_train, X_cat_train]).flatten()
test_preds = model0.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse0 = np.sqrt(train_mse)
test_rmse0 = np.sqrt(test_mse)

#MAE
train_mae0 = mean_absolute_error(y_train, train_preds)
test_mae0 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape0 = mean_absolute_percentage_error(y_train, train_preds)
test_mape0 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_0 = r2_score(y_train, train_preds)
test_r2_0 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse0:.4f}, MAE: {train_mae0:.4f}, MAPE: {train_mape0:.4f}, R²: {train_r2_0:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse0:.4f}, MAE: {test_mae0:.4f}, MAPE: {test_mape0:.4f}, R²: {test_r2_0:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
30/30 ━━━━━━━━━━━━━━━━━━━━ 1s 18ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
Train MSE: 0.0663, RMSE: 0.2575, MAE: 0.1989, MAPE: 0.0252, R²: 0.9742
Test MSE: 0.5710, RMSE: 0.7556, MAE: 0.6125, MAPE: 0.0716, R²: 0.8293
In [354]:
# Global trend plot (total actual vs predicted per month)
y_pred_all = model0.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool_ns.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Arrivals Subset Non-Schengen(2024)")
plt.xlabel("Date")
plt.ylabel("Total Arrivals")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
45/45 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
In [355]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals for Non Schengen Countries (2024)', fontsize=16, y=1.02)
plt.show()
In [ ]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                  country           MSE          RMSE           MAE      MAPE  \
0  Bosnia and Herzegovina  4.089451e+08  20222.391154  13250.893027  0.470344   
1                  Canada  1.519023e+07   3897.463996   2464.991178  0.581352   
2                 Ireland  2.961782e+07   5442.225958   3381.523684  0.592970   
3                     USA  1.114507e+09  33384.228835  23627.993380  0.564387   
4          United Kingdom  2.290262e+09  47856.685346  31533.607595  0.891014   

         R2  
0  0.637028  
1  0.866758  
2  0.588191  
3  0.421547  
4  0.268385  
In [353]:
# Baseline predictions
baseline_preds = model0.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model0.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(max(0, permuted_mse - baseline_mse))


#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model0.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = mean_squared_error(y_test, month_preds) - baseline_mse
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Arrivals (Non-Schengen countries 2024)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step

Subset of countries¶

In [362]:
# Load data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
cool = cool[~cool['country'].isin(['Russian Federation'])]

# Type conversions
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)

# Add Schengen and Euro info
schengen_entry_year = {
    'Austria': 1995, 'Belgium': 1995, 'Czech Republic': 2007, 'Denmark': 2001,
    'Finland': 1996, 'France': 1995, 'Germany': 1995, 'Hungary': 2007,
    'Italy': 1997, 'Netherlands': 1995, 'Norway': 2001, 'Poland': 2007,
    'Portugal': 1995, 'Slovakia': 2007, 'Slovenia': 2007, 'Spain': 1995,
    'Sweden': 2001, 'Switzerland': 2008, 'Romania': 2024,
}


cool['schengen_member'] = cool.apply(lambda row: int(row['year'] >= schengen_entry_year.get(row['country'], np.inf)), axis=1).astype('int8')

# Add Google Trends
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Log transform + lags
cool = cool.sort_values(['country', 'date']).drop_duplicates(subset=['country', 'date']).reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals'])
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])

cool['arrivals_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

month_names = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool.drop(columns=['month_name']), ohe_month], axis=1)

cool['arrivals_lag_1'] = cool.groupby('country')['arrivals'].shift(1)
cool['arrivals_lag_3'] = cool.groupby('country')['arrivals'].shift(3)
cool['arrivals_lag_6'] = cool.groupby('country')['arrivals'].shift(6)
cool['arrivals_lag_12'] = cool.groupby('country')['arrivals'].shift(12)
cool['cpi_lag_1'] = cool.groupby('country')['cpi'].shift(1)
cool['unemp_rate_lag_3'] = cool.groupby('country')['unemployment_rate'].shift(3)
cool['google_trends_lag_1'] = cool.groupby('country')['google_trends'].shift(1)
cool['google_trends_lag_3'] = cool.groupby('country')['google_trends'].shift(3)

cool = cool[cool['date'] >= '2001-01-01']
cool_sub = cool[cool['country'].isin(['Germany', 'Italy', 'Poland', 'Slovenia', 'Switzerland'])].copy()
cool_sub = cool_sub[cool_sub['arrivals_next_month'].notna()].copy()

cool_sub['country_encoded'] = LabelEncoder().fit_transform(cool_sub['country'])
month_cols = [col for col in cool_sub.columns if col.startswith('month_')]

X_numeric = cool_sub[[
    'unemp_rate_lag_3', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted'
] + month_cols].values

X_country_array = cool_sub['country_encoded'].astype('int32').values
y = cool_sub['arrivals_next_month'].values

train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool_sub['date'] <= train_end
val_mask = (cool_sub['date'] > train_end) & (cool_sub['date'] <= val_end)
test_mask = cool_sub['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask.to_numpy()])
X_num_val = scaler.transform(X_numeric[val_mask.to_numpy()])
X_num_test = scaler.transform(X_numeric[test_mask.to_numpy()])
X_numeric_scaled_all = scaler.transform(X_numeric)

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]

input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
embedding = Embedding(input_dim=len(np.unique(X_country_array)), output_dim=5)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)

model0 = Model(inputs=[input_numeric, input_country], outputs=output)
model0.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
history = model0.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16)

model0.summary()
Epoch 1/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 9s 17ms/step - loss: 99.8589 - val_loss: 87.8250
Epoch 2/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 89.2531 - val_loss: 79.5005
Epoch 3/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 78.0134 - val_loss: 64.5254
Epoch 4/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 64.2394 - val_loss: 46.4489
Epoch 5/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 48.8235 - val_loss: 31.7959
Epoch 6/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 30.2262 - val_loss: 16.8680
Epoch 7/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 17.0672 - val_loss: 8.3088
Epoch 8/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 9.8194 - val_loss: 3.9473
Epoch 9/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 5.7126 - val_loss: 2.6425
Epoch 10/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 4.5960 - val_loss: 2.3715
Epoch 11/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 3.9489 - val_loss: 2.1418
Epoch 12/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 4.2142 - val_loss: 2.1984
Epoch 13/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 3.3642 - val_loss: 2.1295
Epoch 14/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 3.4298 - val_loss: 2.3898
Epoch 15/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 3.0317 - val_loss: 2.1423
Epoch 16/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 3.0056 - val_loss: 2.0869
Epoch 17/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 3.1760 - val_loss: 2.1471
Epoch 18/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 2.8644 - val_loss: 2.0593
Epoch 19/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.5819 - val_loss: 1.9651
Epoch 20/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 2.6302 - val_loss: 2.0754
Epoch 21/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 2.3172 - val_loss: 1.9571
Epoch 22/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.6045 - val_loss: 1.9875
Epoch 23/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.3572 - val_loss: 2.0511
Epoch 24/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.2594 - val_loss: 1.9282
Epoch 25/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 2.1104 - val_loss: 1.9812
Epoch 26/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.9974 - val_loss: 2.0113
Epoch 27/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.0764 - val_loss: 1.9094
Epoch 28/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.1980 - val_loss: 1.9146
Epoch 29/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.1560 - val_loss: 1.9428
Epoch 30/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.0843 - val_loss: 1.8962
Epoch 31/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 2.0152 - val_loss: 1.9258
Epoch 32/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.8557 - val_loss: 1.9844
Epoch 33/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.8506 - val_loss: 1.9047
Epoch 34/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.8798 - val_loss: 1.9965
Epoch 35/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 1.5983 - val_loss: 1.9403
Epoch 36/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.8010 - val_loss: 1.8881
Epoch 37/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.6613 - val_loss: 1.9787
Epoch 38/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.7772 - val_loss: 1.9763
Epoch 39/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.5918 - val_loss: 1.9536
Epoch 40/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.8109 - val_loss: 1.9297
Epoch 41/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.7184 - val_loss: 1.9111
Epoch 42/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.6047 - val_loss: 1.9123
Epoch 43/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.5846 - val_loss: 1.9302
Epoch 44/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.6097 - val_loss: 1.8821
Epoch 45/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.6324 - val_loss: 1.9088
Epoch 46/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 16ms/step - loss: 1.5535 - val_loss: 1.9347
Epoch 47/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.5074 - val_loss: 1.9502
Epoch 48/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.6296 - val_loss: 1.9467
Epoch 49/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.4934 - val_loss: 1.9260
Epoch 50/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.5551 - val_loss: 1.8559
Epoch 51/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.4930 - val_loss: 1.9036
Epoch 52/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.3361 - val_loss: 1.8649
Epoch 53/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.4073 - val_loss: 1.8501
Epoch 54/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2863 - val_loss: 1.8864
Epoch 55/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.3986 - val_loss: 1.8456
Epoch 56/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 1.3960 - val_loss: 1.8984
Epoch 57/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 1.5584 - val_loss: 1.8663
Epoch 58/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2133 - val_loss: 1.8410
Epoch 59/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2921 - val_loss: 1.8973
Epoch 60/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2267 - val_loss: 1.8420
Epoch 61/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2583 - val_loss: 1.9192
Epoch 62/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1876 - val_loss: 1.8602
Epoch 63/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1679 - val_loss: 1.8681
Epoch 64/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 1.2488 - val_loss: 1.8632
Epoch 65/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2227 - val_loss: 1.9012
Epoch 66/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.3660 - val_loss: 1.9421
Epoch 67/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2374 - val_loss: 1.8699
Epoch 68/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2288 - val_loss: 1.8792
Epoch 69/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2381 - val_loss: 1.9147
Epoch 70/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2093 - val_loss: 1.9194
Epoch 71/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2100 - val_loss: 1.9425
Epoch 72/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2448 - val_loss: 1.9157
Epoch 73/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2881 - val_loss: 1.8918
Epoch 74/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1497 - val_loss: 1.8765
Epoch 75/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.1248 - val_loss: 1.9502
Epoch 76/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2107 - val_loss: 1.9071
Epoch 77/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.3737 - val_loss: 1.8920
Epoch 78/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.0983 - val_loss: 1.8943
Epoch 79/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1360 - val_loss: 1.9241
Epoch 80/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1182 - val_loss: 1.9082
Epoch 81/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.2238 - val_loss: 1.9105
Epoch 82/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0812 - val_loss: 1.8724
Epoch 83/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1690 - val_loss: 1.9216
Epoch 84/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 19ms/step - loss: 1.1707 - val_loss: 1.9185
Epoch 85/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.3844 - val_loss: 1.8987
Epoch 86/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.0622 - val_loss: 1.8899
Epoch 87/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.1738 - val_loss: 1.8526
Epoch 88/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1046 - val_loss: 1.8631
Epoch 89/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2165 - val_loss: 1.9145
Epoch 90/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.0262 - val_loss: 1.8874
Epoch 91/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1927 - val_loss: 1.9025
Epoch 92/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1282 - val_loss: 1.9364
Epoch 93/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1627 - val_loss: 1.8884
Epoch 94/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1126 - val_loss: 1.9321
Epoch 95/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1513 - val_loss: 1.8814
Epoch 96/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1160 - val_loss: 1.8828
Epoch 97/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0667 - val_loss: 1.9086
Epoch 98/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.0442 - val_loss: 1.8893
Epoch 99/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1034 - val_loss: 1.8814
Epoch 100/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.1580 - val_loss: 1.8957
Model: "functional_40"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_40        │ (None, 1, 5)      │         25 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_40          │ (None, 5)         │          0 │ embedding_40[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_40      │ (None, 30)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_40[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_120 (Dense)   │ (None, 64)        │      1,984 │ concatenate_40[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_120[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_80          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_121 (Dense)   │ (None, 32)        │      2,080 │ dropout_80[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_121[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_81          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_122 (Dense)   │ (None, 1)         │         33 │ dropout_81[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 13,136 (51.32 KB)
 Trainable params: 4,314 (16.85 KB)
 Non-trainable params: 192 (768.00 B)
 Optimizer params: 8,630 (33.71 KB)
In [363]:
# Baseline predictions
baseline_preds = model0.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model0.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(permuted_mse - baseline_mse)

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model0.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = mean_squared_error(y_test, month_preds) - baseline_mse
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 1: Arrivals (2019)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
8/8 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step

Model Evaluation¶

In [364]:
## Model evalauation
# Predict and flatten
train_preds = model0.predict([X_num_train, X_cat_train]).flatten()
test_preds = model0.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse0 = np.sqrt(train_mse)
test_rmse0 = np.sqrt(test_mse)

#MAE
train_mae0 = mean_absolute_error(y_train, train_preds)
test_mae0 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape0 = mean_absolute_percentage_error(y_train, train_preds)
test_mape0 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_0 = r2_score(y_train, train_preds)
test_r2_0 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse0:.4f}, MAE: {train_mae0:.4f}, MAPE: {train_mape0:.4f}, R²: {train_r2_0:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse0:.4f}, MAE: {test_mae0:.4f}, MAPE: {test_mape0:.4f}, R²: {test_r2_0:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
23/30 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step30/30 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step
Train MSE: 0.0673, RMSE: 0.2595, MAE: 0.2090, MAPE: 0.0223, R²: 0.9806
Test MSE: 0.4528, RMSE: 0.6729, MAE: 0.5146, MAPE: 0.0503, R²: 0.8526
In [365]:
# Global trend plot (total actual vs predicted per month)

y_pred_all = model0.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool_sub.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Arrivals Subset(2024)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
45/45 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
In [366]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals (2024)', fontsize=16, y=1.02)
plt.show()
In [367]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
       country           MSE           RMSE            MAE      MAPE        R2
0      Germany  5.353487e+10  231376.028628  139198.397871  0.511892  0.415365
1        Italy  1.070266e+09   32714.921875   15733.568666  0.368516  0.877356
2       Poland  7.211035e+09   84917.810718   50630.250026  0.458698  0.469374
3     Slovenia  2.133528e+09   46190.123747   34501.225409  0.574672  0.827616
4  Switzerland  1.087656e+08   10429.073923    6089.707087  0.318848  0.812500
In [ ]:
 

Subset of countries with overnights¶

In [370]:
# Load data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
cool = cool[~cool['country'].isin(['Russian Federation'])]

# Type conversions
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)

# Add Schengen and Euro info
schengen_entry_year = {
    'Austria': 1995, 'Belgium': 1995, 'Czech Republic': 2007, 'Denmark': 2001,
    'Finland': 1996, 'France': 1995, 'Germany': 1995, 'Hungary': 2007,
    'Italy': 1997, 'Netherlands': 1995, 'Norway': 2001, 'Poland': 2007,
    'Portugal': 1995, 'Slovakia': 2007, 'Slovenia': 2007, 'Spain': 1995,
    'Sweden': 2001, 'Switzerland': 2008, 'Romania': 2024,
}

cool['schengen_member'] = cool.apply(lambda row: int(row['year'] >= schengen_entry_year.get(row['country'], np.inf)), axis=1).astype('int8')

# Add Google Trends
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Log transform + lags
cool = cool.sort_values(['country', 'date']).drop_duplicates(subset=['country', 'date']).reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals'])
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])

cool['overnights_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

month_names = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool.drop(columns=['month_name']), ohe_month], axis=1)

cool['overnights_lag_1'] = cool.groupby('country')['arrivals'].shift(1)
cool['overnights_lag_3'] = cool.groupby('country')['arrivals'].shift(3)
cool['overnights_lag_6'] = cool.groupby('country')['arrivals'].shift(6)
cool['overnights_lag_12'] = cool.groupby('country')['arrivals'].shift(12)
cool['cpi_lag_1'] = cool.groupby('country')['cpi'].shift(1)
cool['unemp_rate_lag_3'] = cool.groupby('country')['unemployment_rate'].shift(3)
cool['google_trends_lag_1'] = cool.groupby('country')['google_trends'].shift(1)
cool['google_trends_lag_3'] = cool.groupby('country')['google_trends'].shift(3)

cool = cool[cool['date'] >= '2001-01-01']
cool_sub2 = cool[cool['country'].isin(['Germany', 'Italy', 'Poland', 'Slovenia', 'Switzerland','USA'])].copy()
cool_sub2 = cool_sub2[cool_sub2['overnights_next_month'].notna()].copy()

cool_sub2['country_encoded'] = LabelEncoder().fit_transform(cool_sub2['country'])
month_cols = [col for col in cool_sub2.columns if col.startswith('month_')]

X_numeric = cool_sub2[[
    'unemp_rate_lag_3', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted'] + month_cols].values

X_country_array = cool_sub2['country_encoded'].astype('int32').values
y = cool_sub2['overnights_next_month'].values

train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool_sub2['date'] <= train_end
val_mask = (cool_sub2['date'] > train_end) & (cool_sub2['date'] <= val_end)
test_mask = cool_sub2['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask.to_numpy()])
X_num_val = scaler.transform(X_numeric[val_mask.to_numpy()])
X_num_test = scaler.transform(X_numeric[test_mask.to_numpy()])
X_numeric_scaled_all = scaler.transform(X_numeric)

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]
In [371]:
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
embedding = Embedding(input_dim=len(np.unique(X_country_array)), output_dim=6)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)

model0 = Model(inputs=[input_numeric, input_country], outputs=output)
model0.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
history = model0.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16)

model0.summary()
Epoch 1/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 15s 23ms/step - loss: 96.2673 - val_loss: 83.4533
Epoch 2/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 82.6436 - val_loss: 67.1095
Epoch 3/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 19ms/step - loss: 67.3150 - val_loss: 48.3680
Epoch 4/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 49.8187 - val_loss: 32.1284
Epoch 5/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 30.1898 - val_loss: 14.7346
Epoch 6/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 15.6023 - val_loss: 6.2273
Epoch 7/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 7.5336 - val_loss: 3.6870
Epoch 8/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 4.6136 - val_loss: 2.5168
Epoch 9/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 3s 37ms/step - loss: 4.1666 - val_loss: 2.2894
Epoch 10/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 3.6290 - val_loss: 2.3527
Epoch 11/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 3.4119 - val_loss: 2.3391
Epoch 12/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 4s 51ms/step - loss: 3.8713 - val_loss: 2.3436
Epoch 13/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 4s 18ms/step - loss: 3.3307 - val_loss: 2.2364
Epoch 14/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 14ms/step - loss: 2.7160 - val_loss: 2.2157
Epoch 15/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 3.0357 - val_loss: 2.1943
Epoch 16/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 3.1373 - val_loss: 2.1400
Epoch 17/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 16ms/step - loss: 2.4436 - val_loss: 2.1681
Epoch 18/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 21ms/step - loss: 2.6754 - val_loss: 2.1685
Epoch 19/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 2.3551 - val_loss: 2.0866
Epoch 20/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 2.6772 - val_loss: 2.0352
Epoch 21/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - loss: 2.1725 - val_loss: 1.9905
Epoch 22/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 15ms/step - loss: 2.4136 - val_loss: 2.1027
Epoch 23/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 2.3841 - val_loss: 2.1134
Epoch 24/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 2.0762 - val_loss: 2.1707
Epoch 25/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 2.2028 - val_loss: 1.9971
Epoch 26/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.7626 - val_loss: 2.0448
Epoch 27/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 1.9955 - val_loss: 2.0780
Epoch 28/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.9399 - val_loss: 2.0611
Epoch 29/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.7777 - val_loss: 2.0006
Epoch 30/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.9445 - val_loss: 1.9586
Epoch 31/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.7657 - val_loss: 2.0208
Epoch 32/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.9710 - val_loss: 2.0420
Epoch 33/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.6780 - val_loss: 1.9459
Epoch 34/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.6586 - val_loss: 1.9306
Epoch 35/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.5943 - val_loss: 1.9734
Epoch 36/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.4743 - val_loss: 2.0016
Epoch 37/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 24ms/step - loss: 1.7330 - val_loss: 1.9821
Epoch 38/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 3s 20ms/step - loss: 1.6734 - val_loss: 1.9506
Epoch 39/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.6656 - val_loss: 1.8936
Epoch 40/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step - loss: 1.4171 - val_loss: 1.9393
Epoch 41/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.3501 - val_loss: 1.9552
Epoch 42/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.5745 - val_loss: 2.0136
Epoch 43/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.6027 - val_loss: 1.9249
Epoch 44/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.4072 - val_loss: 1.9168
Epoch 45/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2585 - val_loss: 1.9042
Epoch 46/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 1.3201 - val_loss: 1.9343
Epoch 47/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 15ms/step - loss: 1.3753 - val_loss: 1.9005
Epoch 48/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.4154 - val_loss: 1.8944
Epoch 49/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.2585 - val_loss: 1.9297
Epoch 50/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.1731 - val_loss: 1.9718
Epoch 51/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.3242 - val_loss: 1.9290
Epoch 52/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2209 - val_loss: 1.9211
Epoch 53/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2861 - val_loss: 1.9466
Epoch 54/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2940 - val_loss: 1.8893
Epoch 55/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.2450 - val_loss: 1.9128
Epoch 56/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2355 - val_loss: 1.9380
Epoch 57/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1741 - val_loss: 1.9609
Epoch 58/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2285 - val_loss: 1.9153
Epoch 59/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.1282 - val_loss: 1.9195
Epoch 60/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1603 - val_loss: 1.9625
Epoch 61/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1549 - val_loss: 1.8665
Epoch 62/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1905 - val_loss: 1.9088
Epoch 63/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.0087 - val_loss: 1.9052
Epoch 64/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 21ms/step - loss: 0.9954 - val_loss: 1.9662
Epoch 65/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 12ms/step - loss: 1.1956 - val_loss: 1.9599
Epoch 66/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 17ms/step - loss: 1.1244 - val_loss: 1.8904
Epoch 67/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.1630 - val_loss: 1.9382
Epoch 68/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1095 - val_loss: 1.9623
Epoch 69/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.0993 - val_loss: 1.9723
Epoch 70/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.0702 - val_loss: 1.9438
Epoch 71/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1175 - val_loss: 1.9137
Epoch 72/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0992 - val_loss: 1.9411
Epoch 73/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.0389 - val_loss: 2.0290
Epoch 74/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 0.9781 - val_loss: 1.9440
Epoch 75/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 0.9713 - val_loss: 1.9440
Epoch 76/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 17ms/step - loss: 1.1828 - val_loss: 1.9852
Epoch 77/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 1.0950 - val_loss: 1.9141
Epoch 78/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - loss: 1.0716 - val_loss: 1.9159
Epoch 79/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step - loss: 1.0330 - val_loss: 1.9094
Epoch 80/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1201 - val_loss: 1.8987
Epoch 81/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0059 - val_loss: 1.9590
Epoch 82/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.1762 - val_loss: 1.9424
Epoch 83/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 15ms/step - loss: 1.0011 - val_loss: 1.9317
Epoch 84/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1152 - val_loss: 1.9488
Epoch 85/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.0686 - val_loss: 2.0011
Epoch 86/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.0379 - val_loss: 1.9176
Epoch 87/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0014 - val_loss: 1.9522
Epoch 88/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9960 - val_loss: 1.9251
Epoch 89/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.9819 - val_loss: 1.9313
Epoch 90/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0863 - val_loss: 1.9259
Epoch 91/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0982 - val_loss: 1.9510
Epoch 92/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1539 - val_loss: 1.9678
Epoch 93/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9057 - val_loss: 1.9402
Epoch 94/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 0.9511 - val_loss: 1.9875
Epoch 95/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9574 - val_loss: 1.9802
Epoch 96/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.1070 - val_loss: 1.9393
Epoch 97/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 17ms/step - loss: 1.0748 - val_loss: 1.9470
Epoch 98/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.1290 - val_loss: 1.9775
Epoch 99/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1856 - val_loss: 1.9335
Epoch 100/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.9632 - val_loss: 1.9449
Model: "functional_42"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_42        │ (None, 1, 6)      │         36 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_42          │ (None, 6)         │          0 │ embedding_42[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_42      │ (None, 31)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_42[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_126 (Dense)   │ (None, 64)        │      2,048 │ concatenate_42[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_126[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_84          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_127 (Dense)   │ (None, 32)        │      2,080 │ dropout_84[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_127[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_85          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_128 (Dense)   │ (None, 1)         │         33 │ dropout_85[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 13,361 (52.20 KB)
 Trainable params: 4,389 (17.14 KB)
 Non-trainable params: 192 (768.00 B)
 Optimizer params: 8,780 (34.30 KB)
In [372]:
# Baseline predictions
baseline_preds = model0.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model0.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(permuted_mse - baseline_mse)

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model0.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = mean_squared_error(y_test, month_preds) - baseline_mse
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 1: Overnights, subset of high-volume countries (2024)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
9/9 ━━━━━━━━━━━━━━━━━━━━ 1s 57ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 42ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 1s 60ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 

Model Evaluation¶

In [373]:
## Model evalauation
# Predict and flatten
train_preds = model0.predict([X_num_train, X_cat_train]).flatten()
test_preds = model0.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse0 = np.sqrt(train_mse)
test_rmse0 = np.sqrt(test_mse)

#MAE
train_mae0 = mean_absolute_error(y_train, train_preds)
test_mae0 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape0 = mean_absolute_percentage_error(y_train, train_preds)
test_mape0 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_0 = r2_score(y_train, train_preds)
test_r2_0 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse0:.4f}, MAE: {train_mae0:.4f}, MAPE: {train_mape0:.4f}, R²: {train_r2_0:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse0:.4f}, MAE: {test_mae0:.4f}, MAPE: {test_mape0:.4f}, R²: {test_r2_0:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
36/36 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 30ms/step
Train MSE: 0.0881, RMSE: 0.2968, MAE: 0.2366, MAPE: 0.0257, R²: 0.9726
Test MSE: 0.4140, RMSE: 0.6434, MAE: 0.4859, MAPE: 0.0480, R²: 0.8591
In [374]:
# Global trend plot (total actual vs predicted per month)

y_pred_all = model0.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool_sub2.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Overnights Subset(2024)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
54/54 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step
In [375]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals (2024)', fontsize=16, y=1.02)
plt.show()
In [376]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
       country           MSE           RMSE            MAE      MAPE        R2
0      Germany  4.024194e+10  200603.944104  121130.643923  0.450907  0.560532
1        Italy  7.848145e+08   28014.540743   16760.034107  0.388984  0.910066
2       Poland  5.813232e+09   76244.554155   43257.363849  0.408261  0.572232
3     Slovenia  3.027006e+09   55018.231569   37428.719706  0.524595  0.755425
4  Switzerland  1.977036e+08   14060.710910    7427.436933  0.330616  0.659180
5          USA  8.089627e+08   28442.269892   19675.270503  0.460863  0.580131